clear

set seed 1073741823

capt ssc install randomselect


* ########## WAVE 1 ##########	

	
/* First session */
insheet using "${pathdata_baseline}/BaselineExperiment_raw_wave1.csv", names
drop in 1/2


destring *, replace 

	keep if writedowninfo==2

	// ONLY NEEDED FOR BONUS PAYMENTS
	//draw selected participants for bonus
	randomselect, gen(sample) prop(0.1)
	tab sample

	// Randomly draw participants who receive a bonus (30$) for the belief stated in 1 and 2, respectively
	randomselect if sample == 1, gen(bonus_belief) prop(0.5)
	replace bonus_belief = 2 if bonus_belief ==1
	replace bonus_belief = 1 if bonus_belief ==0 & sample ==1

	// Randomly draw product for which the bonus payment (30$) is carried out, 

	randomselect if sample == 1, gen(bicycle) prop(0.333)
	randomselect if sample == 1 & bicycle == 0, gen(restaurant) prop(0.5)

	// Note 1: videogame; 2: bicycle; 3: restaurant
	gen bonusproduct = ""
	replace bonusproduct = "videogame" if sample == 1 & bicycle ==0 & restaurant==0
	replace bonusproduct = "bicycle" if sample == 1 & bicycle ==1
	replace bonusproduct = "restaurant" if sample == 1 & restaurant ==1
	tab bonusproduct

	/* Variables to keep */
	keep condition_* p_guess* task* prolific_pid positive_stat negative_stat tot_reviews* story_type* truepos_fraction* sample bonus_belief bonusproduct qid9 qid124 qid12 qid18 durationinseconds finished comprehension1 comprehension2 comprehension3 comprehension5 q202 screenedout writedowninfo v39 v44 _q230_pagesubmit 

		rename qid9 gender 

		rename qid124 age

		rename qid12 education

		rename qid18 employment
		
		*time spent on decision page for videogame, restaurant and bicycle
		rename _q230_pagesubmit pagetime_videogame
		
		rename v39 pagetime_restaurant
		
		rename v44 pagetime_bicycle

		* Indicator Male
		gen male = .
		replace male = 0 if inlist(gender, 2,3)
		replace male = 1 if gender == 1

		* Indicator Bachelor's degree or more
		gen college = .
		replace college = 0 if inlist(education, 1,2,3,4)
		replace college = 1 if inlist(education, 5,6,7,8)

		* Indicator Employment status
		gen employed = .
		replace employed = 0 if inlist(employment,3,4)
		replace employed = 1 if inlist(employment,1,2)
		
		* Indicator finished wave 1 - EMPLOYMENT IS THE LAST MANDATORY QUESTION IN THE SURVEY
		gen completed_wave1 = 0
		replace completed_wave1 = 1 if inlist(employment,1,2,3,4)

		
		

		duplicates drop prolific_pid, force 

		/* Wide to long */
		reshape long condition_ p_guess_ task_ truepos_fraction_ pagetime_, i(prolific_pid) j(product) string
		
		
		gen statisticcondition = 0
		replace statisticcondition = 1 if inlist(condition_, "statistic_pos", "statistic_neg") 
		
		* Compute bayesian posterior
		gen total_reviews = -9999
		replace total_reviews = 14 if product=="videogame"
		replace total_reviews = 19 if product=="restaurant"
		replace total_reviews = 17 if product=="bicycle"
		
		gen sample_reviews = tot_reviews_pos
		replace sample_reviews = tot_reviews_neg if sample_reviews==.
		
		gen sample_positive_reviews = positive_stat
		replace sample_positive_reviews = negative_stat if sample_positive_reviews==.
		
		replace sample_reviews = 1 if condition_=="storyshort_pos"
		replace sample_positive_reviews = 1 if condition_=="storyshort_pos"
		
		replace sample_reviews = 1 if condition_=="storyshort_neg"
		replace sample_positive_reviews = 0 if condition_=="storyshort_neg"
		
		replace sample_reviews = 0 if condition_=="noinfo"
		replace sample_positive_reviews = 0 if condition_=="noinfo"

		gen bayesian_posterior = sample_positive_reviews/total_reviews + (total_reviews-sample_reviews) / total_reviews * (sample_positive_reviews+1)/(sample_reviews+2)
		
		gen share_positive = sample_positive_reviews / sample_reviews
		
		
		encode condition_, gen(condition__)
		
		gen abs_belief_update = abs(bayesian_posterior - 0.5)
		
		mean abs_belief_update, over(condition__)
		
		anova abs_belief_update condition__
		 
		anova abs_belief_update condition__
		reg, baselevels

save "${pathdata_baseline}/temp/BaselineExperiment_long.dta", replace

	
* ########## Prolific IDs only ##########	
preserve
	duplicates drop prolific_pid, force
	drop if prolific_pid==""
	keep prolific_pid

save "${pathdata_baseline}/temp/BaselineExperiment_prolific_pid_only.dta", replace
restore



preserve

keep if statisticcondition == 1 & share_positive == 1
keep prolific_pid 
export excel using "${pathdata_baseline}/temp/BaselineExperiment_Error_ids.xlsx", replace first(var)
restore


* ########## WAVE 2 ##########	
	
clear

/* Second (recall) session */
insheet using "${pathdata_baseline}/BaselineExperiment_raw_wave2.csv", names
drop in 1/2

destring *, replace
	
	*** give w2 duration in seconds a new name so it does not overwrite wave1
	rename durationinseconds durationinseconds2
	
	/* Variables to keep */	
	keep beliefs_* cu_* prolific_pid screenedout _open_endedfirst v44 v50 _openended_second v27 v33 v26 v32 _beliefs_first durationinseconds2


	drop if prolific_pid == ""
	sort prolific_pid
	quietly by prolific_pid:  gen dup = cond(_N==1,0,_n)
	drop if dup > 0
	drop dup

	* CAUTION: WE NEED TO RECODE THIS BECAUSE SOMETHING WENT WRONG IN QUALTRICS. SPECIFICALLY, SOME BELIEFS WHERE CODED IN V26 and V32 AND BELIEFS_FIRST. WE CORRECT THIS HERE.
	replace beliefs_videogame = _beliefs_first if beliefs_videogame ==.
	replace beliefs_bicycle = v26 if beliefs_bicycle ==.
	replace beliefs_restaurant = v32 if beliefs_restaurant ==.

	drop v26 v32 _beliefs_first

	/* Wide to long */
	reshape long beliefs_ cu_, i(prolific_pid) j(product) string

	rename _open_endedfirst text_videogame
	replace text_videogame = _openended_second if text_videogame == ""

	rename v44 text_bicycle
	replace text_bicycle = v27 if text_bicycle == ""

	rename v50 text_restaurant
	replace text_restaurant = v33 if text_restaurant == ""

	drop _openended_second v27 v33

	gen free_form = ""
	replace free_form = text_videogame if product =="videogame"
	replace free_form = text_bicycle if product =="bicycle"
	replace free_form = text_restaurant if product =="restaurant"
	drop text*

save "${pathdata_baseline}/temp/BaselineExperiment_long_followup.dta", replace

* ########## MERGE WAVE 1 and WAVE 2 ##########	
	

	/* Merged */
	merge 1:1 prolific_pid product using "${pathdata_baseline}/temp/BaselineExperiment_long.dta"
	
* ########## CREATE DATA ON ATTRITION IN BETWEEN SUBJECT TREATMENTS ##########	

	
	* indicator whether subjects completed study 2
	gen completed = 0
	replace completed = 1 if _merge ==3
	
	* betweeen subject treatment indicators
	gen mixed = 0
	replace mixed = 1 if story_type == "mixed"
	
	gen neutral = 0
	replace neutral = 1 if story_type == "neutral"
	
	gen consistent = 0
	replace consistent = 1 if story_type == "consistent"
	
	* ##### Generate dataset with treatments and completion only
preserve
	* drop NAs
	drop if _merge==1
	
	* REMOVE SUBJECTS AFFECTED BY CODING ERROR
	drop if statisticcondition == 1 & share_positive == 1
	bysort prolific_pid : drop if _N<3
	

	keep prolific_pid completed mixed neutral consistent
	
	bysort prolific_pid : drop if _N<3
	
	duplicates drop
	
	save "${pathdata_summary}/BaselineAttrition.dta", replace
	
restore
	
	drop mixed neutral consistent
		
	keep if _merge==3
	drop _merge

	destring*, replace

	/* Exclusion Criteria */
	drop if screenedout == "True"
	drop if condition_ == "noinfo" & p_guess_ != 50

save "${pathdata_baseline}/temp/BaselineExperiment_merged.dta", replace

* ########## ADD VARIABLES TO MERGED DATA ##########	


/* Variables for analysis */

	/* 1 - Effect variables */
	gen diff = 50 - p_guess_
	gen diff_recall = 50 - beliefs_


	gen effect = diff
	replace effect = p_guess_ - 50 if inlist(condition_, "storyshort_pos", "statistic_pos") 

	gen effect_recall = diff_recall
	replace effect_recall = beliefs_ - 50 if inlist(condition_, "storyshort_pos", "statistic_pos")

	/* Drop missing products */
	drop if p_guess_ == .

	preserve

	drop if condition_ == "noinfo"
	
	

	
	/* Drop observations where updating is in the wrong direction */

		tab effect

		drop if effect < 0 & inlist(condition_, "statistic_pos", "statistic_neg")
		bysort prolific_pid : drop if _N==1

		tab prolific_pid

save "${pathdata_baseline}/BaselineExperiment_withError.dta", replace




* REMOVE SUBJECTS AFFECTED BY CODING ERROR
drop if statisticcondition == 1 & share_positive == 1
bysort prolific_pid : drop if _N==1

save "${pathdata_baseline}/BaselineExperiment.dta", replace





* ##### Save data for summary statistics

	keep prolific_pid male age college employed

	duplicates drop

	save "${pathdata_summary}/BaselineSummary.dta", replace


restore

* drop demographics as we won't need them in the future
	drop male age college employed

* ########## OPEN TEXT RESPONSES ##########	

preserve

	keep prolific_pid  beliefs_   p_guess_ diff condition_ product free_form story_type
	egen countvar = nwords(free_form)
export excel using "${pathdata_baseline}/temp/Baseline_free_form_data.xlsx", replace first(var)

restore


* ########## Data cleaning ##########
clear 

import excel using "${pathdata_baseline}/BaselineExperiment_coded_recall_merged.xlsx", first
drop in 1

destring *, replace 

foreach v in  Nomemory Mentiontype Misremtype Mentionvalence Misremvalence Recallstatcorrectly MentionqualFactors Mentionfirst Recallimmediatebelief Fullconfusion Misremacrossscenarios Flagformiscoruncertaincodi {
	replace `v'=0 if `v'==.
}

foreach v in  Nomemory_a Mentiontype_a Misremtype_a Mentionvalence_a Misremvalence_a Recallstatcorrectly_a MentionqualFactors_a Mentionfirst_a Recallimmediatebelief_a Fullconfusion_a Misremacrossscenarios_a Flagformiscoruncertaincodi_a {
	replace `v'=0 if `v'==.
}

foreach v in  Nomemory_b Mentiontype_b Misremtype_b Mentionvalence_b Misremvalence_b Recallstatcorrectly_b MentionqualFactors_b Mentionfirst_b Recallimmediatebelief_b Fullconfusion_b Misremacrossscenarios_b Flagformiscoruncertaincodi_b {
	replace `v'=0 if `v'==.
}

sum Nomemory Mentiontype Misremtype Mentionvalence Misremvalence Recallstatcorrectly MentionqualFactors Mentionfirst Recallimmediatebelief Fullconfusion Misremacrossscenarios Flagformiscoruncertaincodi

gen id_type = . 
replace id_type = 1 if inlist(condition_,"statistic_neg", "statistic_pos" )
replace id_type = 2 if inlist(condition_,"storyshort_neg", "storyshort_pos" )
replace id_type = 0 if inlist(condition_,"noinfo" )

gen id_story_type = .
replace id_story_type = 0 if inlist(story_type,"neutral")
replace id_story_type = 1 if inlist(story_type,"mixed")
replace id_story_type = 2 if inlist(story_type,"consistent")

gen id_val = . 
replace id_val = 0 if  inlist(condition_,"statistic_neg", "storyshort_neg" )
replace id_val = 1 if  inlist(condition_,"statistic_pos", "storyshort_pos" )

encode condition_, gen(id_cond)

bysort condition_: sum Misremtype if Mentiontype ==1
bysort condition_: sum Misremvalence if Mentionvalence ==1
replace id_cond = .  if story_type!="consistent"

label list
label define id_cond 1 "No info" 2 "Statistic negative" 3 "Statistic positive" 4 "Story negative" 5 "Story positive", replace

gen id_cond_stat = id_cond
replace id_cond_stat =. if inlist(id_cond, 1, 4, 5)
replace id_cond_stat = 0 if id_cond_stat ==2
replace id_cond_stat = 1 if id_cond_stat ==3


gen id_cond_story = id_cond
replace id_cond_story =. if inlist(id_cond, 1, 2, 3)
replace id_cond_story = 0 if id_cond_story ==4
replace id_cond_story = 1 if id_cond_story ==5

label define id_cond_story 0 "Story negative" 1 "Story positive", replace

label define id_cond_stat 0 "Statistic negative" 1 "Statistic positive", replace

*Indicator = 1 if subject correctly remembers type and valence
gen recallcombined = 0
replace recallcombined = 1 if Mentiontype == 1 & Misremtype == 0 & Mentionvalence == 1 & Misremvalence == 0

*Indicator for stat condition
gen statisticcondition = 0
replace statisticcondition = 1 if inlist(condition_,"statistic_neg", "statistic_pos")


*Indicator for story condition
gen story = 0
replace story = 1 if inlist(condition_,"storyshort_neg", "storyshort_pos")


gen diff = 50 - p_guess_
gen diff_recall = 50 - beliefs_


gen effect = diff
replace effect = p_guess_ - 50 if inlist(condition_, "storyshort_pos", "statistic_pos") 

gen effect_recall = diff_recall
replace effect_recall = beliefs_ - 50 if inlist(condition_, "storyshort_pos", "statistic_pos")


merge 1:1 prolific_pid product using "${pathdata_baseline}/BaselineExperiment_withError.dta"
keep if _merge == 3
drop _merge


save "${pathdata_baseline}/BaselineExperimentRecall_withError.dta",replace

* REMOVE SUBJECTS AFFECTED BY CODING ERROR
import excel "${pathdata_baseline}/temp/BaselineExperiment_Error_ids.xlsx", sheet("Sheet1") firstrow clear
tempfile exclude_list
save `exclude_list'


use "${pathdata_baseline}/BaselineExperimentRecall_withError.dta", clear
merge m:1 prolific_pid using `exclude_list'
drop if _merge == 3
drop _merge



save "${pathdata_baseline}/BaselineExperimentRecall.dta", replace



















